Challenge 9

Author

Giovanni Librizzi

Revision

My interactive table was grouped, when it should have been ungrouped to allow for more searching.

Setup

library(tidyverse)
library(gt)
library(kableExtra)
library(DT)
names_a <- read_csv(here::here("Week9", "Lab", "StateNames_A.csv"))
names_a <- names_a |>
  rename(Sex = Gender)

3. Summarizing & Visualizing the Number of Allisons

1.

allison_data <- names_a |>
  filter(Name == "Allison") |>
  select(-Year) |>
  group_by(State, Sex) |>
  summarize(n = sum(Count)) |>
  pivot_wider(names_from = Sex,
              values_from = n,
              values_fill = 0) |>
  rename(Male = M, Female = F)

allison_data |>
  head(n = 15) |>
  knitr::kable(
    caption = "Number of Babies Named \"Allison\" by State and Sex (head=15)"
  ) |>
  kable_styling(font_size = 15)
Number of Babies Named "Allison" by State and Sex (head=15)
State Female Male
AK 232 0
AL 1535 0
AR 1198 0
AZ 1880 0
CA 12413 0
CO 1594 0
CT 1099 0
DC 321 0
DE 294 0
FL 4455 0
GA 3257 0
HI 183 0
IA 1477 0
ID 451 0
IL 5110 0
DT::datatable(names_a)

2.

allison_f <- names_a |>
  filter(Name == "Allison", 
         Sex == "F") |>
  select(-State) |>
  group_by(Year) |>
  summarize(n = sum(Count))

allison_f |>
  head(n = 15) |>
  knitr::kable(
    caption = "Number of Babies Named \"Allison\" with Female Sex by Year (head=15)"
  ) |>
  kable_styling(font_size = 15)
Number of Babies Named "Allison" with Female Sex by Year (head=15)
Year n
1997 7274
1998 7861
1999 7023
2000 6314
2001 6209
2002 6237
2003 5850
2004 5871
2005 5631
2006 5560
2007 5450
2008 6237
2009 6579
2010 5856
2011 5453

3.

allison_f |>
  ggplot(mapping = aes(x = Year, y = n)) +
  geom_line() +
  geom_point() +
  labs(y = NULL,
       title = "Number of Babies With The Name Allison By Year"
       )

4. Modeling the Number of Allisons

  1. Fit a linear model with the year as the explanatory variable, and the number of Allisons as the response. Similar to #3, each year should have one observation–the total number of Allisons born that year.
allison_lm <- lm(n ~ Year, data = allison_f)
  1. Write out the estimated regression equation.

    n = 209689.8 - 101*Year

  1. Plot the residuals of the model, that is, the actual values minus the predicted values. Comment on the residuals - do you see any patterns?
allison_lm |>
  broom::augment() |>
  ggplot(mapping = aes(y = .resid, x = .fitted)) +
  geom_point() +
  labs(x = "Fitted",
       y = "Residuals",
       title = "Residuals of the Allison Model"
       )

The residuals start tight and spread out over time on the y-axis, so at the end they are very spread out. They almost seem to trend downward too.

  1. From the spread of this model, we can see the name is very varied in popularity, but does seem to be more unpopular now

5. Spelling by State

8.

alan_spelling <- names_a |>
  filter(Name %in% c("Allan", "Alan", "Allen")) |>
  group_by(Name, Year) |>
  summarize(n = sum(Count))

alan_spelling %>%
  head(n = 15) %>%
  knitr::kable(
    caption = "Number of Babies Named Different Allen Spellings by Year (head=15)",
    booktabs = TRUE
  ) %>%
  kable_styling(font_size = 10,
                latex_options = "striped",
                )
Number of Babies Named Different Allen Spellings by Year (head=15)
Name Year n
Alan 1997 2155
Alan 1998 2102
Alan 1999 2220
Alan 2000 2398
Alan 2001 2622
Alan 2002 2591
Alan 2003 3083
Alan 2004 3051
Alan 2005 3189
Alan 2006 3442
Alan 2007 3225
Alan 2008 3002
Alan 2009 2817
Alan 2010 2487
Alan 2011 2321
alan_spelling |>
  ggplot(mapping = aes(x = Year, y = n, color = Name)) +
  geom_line() +
  geom_point() +
  labs(
    y = NULL,
    title = "Number of Babies with Different Spellings of Allen by Year"
  )

9.

allan_state <- names_a |>
  filter(Name %in% c("Allan", "Alan", "Allen"),
         Year == 2000,
         Sex == "M",
         State %in% c("CA", "PA")) |>
  select(-Sex, -Year) |>
  group_by(Name, State) 

allan_state_wide <- allan_state |>
  pivot_wider(names_from = Name,
              values_from = Count,
              values_fill = 0)

allan_state_wide |>
  knitr::kable(
    caption = "Babies Born with Differing Spellings of Alan by State"
  )
Babies Born with Differing Spellings of Alan by State
State Alan Allen Allan
CA 579 176 131
PA 51 56 12
allan_percent <- allan_state |>
  group_by(State) |>
  mutate(prop = Count / sum(Count)) |>
  select(-Count) |>
  pivot_wider(names_from = Name,
              values_from = prop,
              values_fill = 0)

# Why is it moving the state column??????? 
allan_percent %>%
  gt() %>%
  fmt_percent(columns = Alan:Allan, decimals = 2)
Alan Allen Allan
CA
65.35% 19.86% 14.79%
PA
42.86% 47.06% 10.08%